package com.yzq.os.spider.v.controller;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.servlet.ModelAndView;
import org.springframework.web.servlet.view.RedirectView;

import com.yzq.os.spider.v.Constants;
import com.yzq.os.spider.v.domain.Record;
import com.yzq.os.spider.v.service.CrawlService;
import com.yzq.os.spider.v.service.domain.SpiderRecordService;
import com.yzq.os.spider.v.service.domain.SearchEngineService;
import com.yzq.os.spider.v.service.domain.ServerService;

/**
 * 抓取执行控制器
 * 
 * @author 苑志强(xingyu_yzq@163.com)
 * 
 */
@Controller
@RequestMapping("/spider")
public class SpiderRecordController {

	private static Logger logger = Logger
			.getLogger(SpiderRecordController.class);

	/**
	 * 停止集群中某台服务器的某搜索引擎抓取任务接口
	 */
	private static final String STOP_LOCAL_THREAD_INTERFACE = "http://<SERVER_IP_ADDRESS>:8080/<CONTEXT_PATH>/admin/spider/stopLocal/<SEARCH_ENGINE_ID>";

	/**
	 * 集群中某台服务器运行某搜索引擎抓取任务接口
	 */
	private static final String RUN_LOCAL_THREAD_INTERFACE = "http://<SERVER_IP_ADDRESS>:8080/<CONTEXT_PATH>/admin/spider/runLocal/<SEARCH_ENGINE_ID>_<DATE>_<FIND_URL_SIZE>_<MIN_THREAD_NUM>_<MAX_THREAD_NUM>_<ARRAY_QUEUE_SIZE>";

	@Autowired
	private SearchEngineService searchEngineService;

	@Autowired
	private SpiderRecordService crawJobService;

	@Autowired
	private ServerService serverService;

	/**
	 * 执行抓取任务页面
	 * 
	 * @return
	 */
	@RequestMapping("/form")
	public ModelAndView form() {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("engines", searchEngineService.findUIViews());
		model.put("servers", serverService.findAll());
		return new ModelAndView("/admin/spider/form", model);
	}

	/**
	 * 删除某一天某搜索引擎的抓取结果表
	 * 
	 * @return
	 * @throws ParseException
	 */
	@RequestMapping("/drop")
	public ModelAndView drop() throws ParseException {
		List<String> tables = crawJobService.findExistedTables();
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("engines", searchEngineService.findUIViews());
		model.put("tables", tables);
		return new ModelAndView("/admin/spider/drop", model);
	}

	/**
	 * 执行删除某一天某搜索引擎的抓取结果表
	 * 
	 * @param searchEngineId
	 * @param date
	 * @return
	 * @throws ParseException
	 */
	@RequestMapping(value = "/drop", method = RequestMethod.POST)
	public RedirectView drop(int searchEngineId, String date)
			throws ParseException {
		String tableName = SpiderRecordService.parseTableName(searchEngineId,
				date);
		if (crawJobService.isExistTableOnlineDatabase(tableName)) {
			crawJobService.dropOnlineTable(tableName);
		}
		return new RedirectView("drop");
	}

	/**
	 * 通知集群中的某服务器执行某搜索引擎的抓取任务
	 * 
	 * @param ip
	 * @param searchEngineId
	 * @param date
	 * @param findUrlSize
	 * @param minThreadNum
	 * @param maxThreadNum
	 * @param arrayQueueSize
	 * @return
	 * @throws IOException
	 */
	@RequestMapping(method = RequestMethod.POST)
	public RedirectView runRemote(String ip, int searchEngineId, String date,
			int findUrlSize, int minThreadNum, int maxThreadNum,
			int arrayQueueSize) throws IOException {
		// Make run remote url
		String u = StringUtils.replaceEach(
				RUN_LOCAL_THREAD_INTERFACE,
				new String[] { "<SERVER_IP_ADDRESS>", "CONTEXT_PATH",
						"<SEARCH_ENGINE_ID>", "<DATE>", "<FIND_URL_SIZE>",
						"<MIN_THREAD_NUM>", "<MAX_THREAD_NUM>",
						"<ARRAY_QUEUE_SIZE>" },
				new String[] { ip, serverService.getContextPath(),
						String.valueOf(searchEngineId), date,
						String.valueOf(findUrlSize),
						String.valueOf(minThreadNum),
						String.valueOf(maxThreadNum),
						String.valueOf(arrayQueueSize) });
		logger.info("Make run remote url:[" + u + "]");
		URL url = new URL(u);
		URLConnection conn = url.openConnection();
		InputStream in = conn.getInputStream();
		String message = IOUtils.toString(in);
		IOUtils.closeQuietly(in);
		logger.info("Run remote crawl thread ip:[" + ip + "],searchEngineId:["
				+ searchEngineId + "],message:[" + message + "]");

		return new RedirectView("spider/form");
	}

	/**
	 * 本地服务器运行某搜索引擎的抓取任务
	 * 
	 * @param searchEngineId
	 * @param date
	 * @param findUrlSize
	 * @param minThreadNum
	 * @param maxThreadNum
	 * @param arrayQueueSize
	 * @return
	 * @throws ParseException
	 */
	@RequestMapping("/runLocal/{searchEngineId}_{date}_{findUrlSize}_{minThreadNum}_{maxThreadNum}_{arrayQueueSize}")
	@ResponseBody
	public String runLocal(@PathVariable int searchEngineId,
			@PathVariable String date, @PathVariable int findUrlSize,
			@PathVariable int minThreadNum, @PathVariable int maxThreadNum,
			@PathVariable int arrayQueueSize) throws ParseException {
		Date markDate = new Date();// set default value
		if (StringUtils.isNotBlank(date)) {
			markDate = DateUtils.parseDate(date,
					new String[] { Constants.DATE_PATTERN });
		}
		crawJobService.callSpider(SpiderRecordService.MANUAL_CALL,
				searchEngineId, markDate, findUrlSize, minThreadNum,
				maxThreadNum, arrayQueueSize);
		return "OK";
	}

	/**
	 * 通知集群中的某服务器停止某搜索引擎抓取任务
	 * 
	 * @param ip
	 * @param searchEngineId
	 * @return
	 * @throws IOException
	 */
	@RequestMapping("/stopRemote")
	@ResponseBody
	public String stopRemote(String ip, int searchEngineId) throws IOException {
		URL url = new URL(StringUtils.replaceEach(
				STOP_LOCAL_THREAD_INTERFACE,
				new String[] { "<SERVER_IP_ADDRESS>", "<CONTEXT_PATH>",
						"<SEARCH_ENGINE_ID>" },
				new String[] { ip, serverService.getContextPath(),
						String.valueOf(searchEngineId) }));
		URLConnection conn = url.openConnection();
		InputStream in = conn.getInputStream();
		String message = IOUtils.toString(in);
		IOUtils.closeQuietly(in);
		logger.info("Stop remote crawl thread ip[" + ip + "],message["
				+ message + "]");
		return "OK";
	}

	/**
	 * 本地服务器停止某搜索引擎的抓取任务
	 * 
	 * @param searchEngineId
	 * @return
	 * @throws ParseException
	 */
	@RequestMapping("/stopLocal/{searchEngineId}")
	@ResponseBody
	public String stopLocal(@PathVariable int searchEngineId)
			throws ParseException {
		logger.info("Will stop local crawl searchEngineId:[" + searchEngineId
				+ "]");
		CrawlService crawlThread = SpiderRecordService
				.takeRunning(searchEngineId);
		if (crawlThread != null) {
			crawlThread.stopThread();
			logger.info("Stoped local crawl searchEngineId:[" + searchEngineId
					+ "]");
		}
		return "OK";
	}

	/**
	 * 系统测试抓取流程页面
	 * 
	 * @return
	 */
	@RequestMapping("/test")
	public ModelAndView testForm() {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("engines", searchEngineService.findUIViews());
		return new ModelAndView("/admin/spider/test", model);
	}

	/**
	 * 系统自动执行抓取流程测试（由于网站会改版，配置参数和实现类会失效，可以通过这来实现自动测试，发现问题点）
	 * 
	 * @param searchEngineId
	 * @param postUrl
	 * @return
	 * @throws ClassNotFoundException
	 */
	@RequestMapping(value = "/test", method = RequestMethod.POST)
	public ModelAndView test(int searchEngineId, String postUrl)
			throws ClassNotFoundException {
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("engines", searchEngineService.findUIViews());
		model.put("searchEngineId", searchEngineId);
		model.put("postUrl", postUrl);
		List<String[]> testMsgs = crawJobService.test(searchEngineId, postUrl);
		model.put("testMsgs", testMsgs);
		return new ModelAndView("/admin/spider/test", model);
	}

	/**
	 * 查看数据库中已经存在的抓取数据保存表（某网站+日期为表名的表）
	 * 
	 * @return
	 */
	@RequestMapping("/view_tables")
	public ModelAndView viewTables() {
		List<String> tables = crawJobService.findExistedTables();
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("tables", tables);
		return new ModelAndView("/admin/spider/tables", model);
	}

	/**
	 * 查看数据库中某张数据保存表的前n条数据，用来检查抽取数据是否正确
	 * 
	 * @param tableName
	 * @return
	 * @throws ParseException
	 */
	@RequestMapping("/view/{tableName}")
	public ModelAndView view(@PathVariable String tableName)
			throws ParseException {
		int returnSize = 10;
		List<Record> jobs = crawJobService.findBeforeJobs(tableName,
				returnSize);
		int totalCount = crawJobService.countOnlineTable(tableName);
		Map<String, Object> model = new HashMap<String, Object>();
		model.put("tableName", tableName);
		model.put("totalCount", totalCount);
		model.put("returnSize", returnSize);
		model.put("jobs", jobs);
		return new ModelAndView("/admin/spider/view", model);
	}

}
